
cat('Creating Tables A14-A16 and Figure 4.3 and A1 \n\n')

dat = read.csv('TablesA14A15A16Figures4_3A1_data.csv')
non.south.dat = dat[dat$south == 0,]

##Possible correct answers for the chosen block
asian.corrects = c('Asian','A','AB','ABH','ABHW','ABW','AH','AHW','AW')
black.corrects = c('Black','B','AB','ABH','ABHW','ABW','BH','BHW','BW')
hispanic.corrects = c('Hispanic','H','ABH','ABHW','AH','AHW','BH','BHW','HW')
white.corrects = c('White','W','ABHW','ABW','AHW','AW','BHW','BW','HW')

##create matrix for storage
outmat = matrix(nrow = 4,ncol =5)
colnames(outmat) = c('Chance','Asian','Black','Hispanic','White')
rownames(outmat) = c('Tracts','Block Groups','Block Groups and Neighbors','N')

##find percents correct for tracts (used below too)
ac = mean(dat$asian.correct.tract[dat$polyorigin %in% asian.corrects])
bb = mean(dat$prop.chance.tracts[dat$polyorigin %in% black.corrects])
bc = mean(dat$black.correct.tract[dat$polyorigin %in% black.corrects])
hc = mean(dat$hispanic.correct.tract[dat$polyorigin %in% hispanic.corrects])
wc = mean(dat$white.correct.tract[dat$polyorigin %in% white.corrects])

##fill table
outmat[1,1] = bb
outmat[1,2] = ac
outmat[1,3] = bc
outmat[1,4] = hc
outmat[1,5] = wc

outmat[2,1] = mean(dat$prop.chance.bg[dat$polyorigin %in% black.corrects])
outmat[2,2] = mean(dat$asian.correct.bg[dat$polyorigin %in% asian.corrects])
outmat[2,3] = mean(dat$black.correct.bg[dat$polyorigin %in% black.corrects])
outmat[2,4] = mean(dat$hispanic.correct.bg[dat$polyorigin %in% hispanic.corrects])
outmat[2,5] = mean(dat$white.correct.bg[dat$polyorigin %in% white.corrects])

outmat[3,1] = mean(dat$prop.chance.adj[dat$polyorigin %in% black.corrects])
outmat[3,2] = mean(dat$asian.correct.adj[dat$polyorigin %in% asian.corrects])
outmat[3,3] = mean(dat$black.correct.adj[dat$polyorigin %in% black.corrects])
outmat[3,4] = mean(dat$hispanic.correct.adj[dat$polyorigin %in% hispanic.corrects])
outmat[3,5] = mean(dat$white.correct.adj[dat$polyorigin %in% white.corrects])

outmat[4,2] = nrow(dat[dat$polyorigin%in%asian.corrects,])
outmat[4,3] = nrow(dat[dat$polyorigin%in%black.corrects,])
outmat[4,4] = nrow(dat[dat$polyorigin%in%hispanic.corrects,])
outmat[4,5] = nrow(dat[dat$polyorigin%in%white.corrects,])

outmat = round(outmat,2)
outtable = xtable(outmat)

print.xtable(outtable,
            'TableA14.tex',
            floating = F,
            type = 'latex')


##segregation results
asian.dat = dat[dat$polyorigin %in% asian.corrects,]
##remove non complete cases so that clustered errors work. Do this separately for each group
asian.dat = asian.dat[complete.cases(asian.dat[,c('dissimilarity.asian','percent.asian','prop.chance.tracts','metroid')]),c('asian.correct.tract','dissimilarity.asian','percent.asian','prop.chance.tracts','metroid','STATE_FIPS')]
colnames(asian.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid')
asian.reg = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                data = asian.dat)
clustered.se = cl(asian.dat, asian.reg, asian.dat$metroid)
asian.reg$se = clustered.se

black.dat = dat[dat$polyorigin %in% black.corrects,]
black.dat = black.dat[complete.cases(black.dat[,c('dissimilarity.black','percent.black','prop.chance.tracts','metroid')]),c('black.correct.tract','dissimilarity.black','percent.black','prop.chance.tracts','metroid','STATE_FIPS')]
colnames(black.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
black.reg = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                data = black.dat)
clustered.se = cl(black.dat, black.reg, black.dat$metroid)
black.reg$se = clustered.se

hispanic.dat = dat[dat$polyorigin %in% hispanic.corrects,]
hispanic.dat = hispanic.dat[complete.cases(hispanic.dat[,c('dissimilarity.hispanic','percent.hispanic','prop.chance.tracts','metroid')]),c('hispanic.correct.tract','dissimilarity.hispanic','percent.hispanic','prop.chance.tracts','metroid','STATE_FIPS')]
colnames(hispanic.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
hispanic.reg = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                   data = hispanic.dat)
clustered.se = cl(hispanic.dat, hispanic.reg, hispanic.dat$metroid)
hispanic.reg$se = clustered.se

white.dat = dat[dat$polyorigin %in% white.corrects,]
white.dat = white.dat[complete.cases(white.dat[,c('dissimilarity.white','percent.white','prop.chance.tracts','metroid')]),c('white.correct.tract','dissimilarity.white','percent.white','prop.chance.tracts','metroid','STATE_FIPS')]
colnames(white.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
white.reg = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                data = white.dat)
clustered.se = cl(white.dat, white.reg, white.dat$metroid)
white.reg$se = clustered.se

coef.names = c('Intercept', 'Segregation', 'Group Population','Proportion Chance')

outtable = apsrtable(asian.reg,
                     black.reg,
                     hispanic.reg,
                     white.reg,
                     Sweave = T,
                     coef.names = coef.names,
                     notes = '',
                     stars = 'default'
                    )
writeLines(
  outtable, 'TableA15.tex')


##do the same for the non-South
asian.dat = non.south.dat[non.south.dat$polyorigin %in% asian.corrects,]
asian.dat = asian.dat[complete.cases(asian.dat[,c('dissimilarity.asian','percent.asian','prop.chance.tracts','metroid')]),c('asian.correct.tract','dissimilarity.asian','percent.asian','prop.chance.tracts','metroid')]
colnames(asian.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
asian.reg1 = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                data = asian.dat)
clustered.se = cl(asian.dat, asian.reg1, asian.dat$metroid)
asian.reg1$se = clustered.se

black.dat = non.south.dat[non.south.dat$polyorigin %in% black.corrects,]
black.dat = black.dat[complete.cases(black.dat[,c('dissimilarity.black','percent.black','prop.chance.tracts','metroid')]),c('black.correct.tract','dissimilarity.black','percent.black','prop.chance.tracts','metroid')]
colnames(black.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
black.reg1 = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                data = black.dat)
clustered.se = cl(black.dat, black.reg1, black.dat$metroid)
black.reg1$se = clustered.se

hispanic.dat = non.south.dat[non.south.dat$polyorigin %in% hispanic.corrects,]
hispanic.dat = hispanic.dat[complete.cases(hispanic.dat[,c('dissimilarity.hispanic','percent.hispanic','prop.chance.tracts','metroid')]),c('hispanic.correct.tract','dissimilarity.hispanic','percent.hispanic','prop.chance.tracts','metroid')]
colnames(hispanic.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
hispanic.reg1 = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                   data = hispanic.dat)
clustered.se = cl(hispanic.dat, hispanic.reg1, hispanic.dat$metroid)
hispanic.reg1$se = clustered.se

white.dat = non.south.dat[non.south.dat$polyorigin %in% white.corrects,]
white.dat = white.dat[complete.cases(white.dat[,c('dissimilarity.white','percent.white','prop.chance.tracts','metroid')]),c('white.correct.tract','dissimilarity.white','percent.white','prop.chance.tracts','metroid')]
colnames(white.dat) = c('correct.tract','dissimilarity','group.percent','prop.chance','metroid') ##rename to keep consistent in regressions below
white.reg1 = lm(correct.tract~dissimilarity+group.percent+prop.chance,
                data = white.dat)
clustered.se = cl(white.dat, white.reg1, white.dat$metroid)
white.reg1$se = clustered.se

coef.names = c('Intercept', 'Segregation', 'Group Population','Proportion Chance')

outtable = apsrtable(asian.reg1,
                     black.reg1,
                     hispanic.reg1,
                     white.reg1,
                     Sweave = T,
                     coef.names = coef.names,
                     notes = '',
                     stars = 'default'
)
writeLines(
  outtable, 'TableA16.tex')



##Bar chart

colors = brewer.pal(9,'Greys')[c(4:7,9)] ##colors for pairs used in plots

jpeg('Figure4_3.jpeg',
     width =  4.66,
     height = 5.995,
     units = 'in',
     res = 1200,
     pointsize = 10)
heights = c(ac,bc,hc,wc,bb)
par(mar = c(3, 4, 2, 2) )
par(las = 1)
xs = barplot(heights,
             #density = densities
             col = colors,
             border = NA
)

axis(side = 1,
     at = xs,
     labels = c('Asian','Black','Hisapnic','White','Chance'),
     lty = 0,
     line = -.75,
     cex.axis = 1)
dev.off()


##scatter plots
use.dat = dat[dat$polyorigin %in% asian.corrects,c('asian.dev.abs','asian.correct.tract')]
xs = use.dat$asian.dev.abs
ys = use.dat$asian.correct.tract
use.dat = as.data.frame(cbind(xs,ys))
use.dat$bins <- cut(xs, breaks = 10)
use.dat <- ddply(use.dat, .(bins), summarise,
                 count = length(xs),
                 ys_mean = mean(ys, na.rm = TRUE),
                 xs_mean = mean(xs, na.rm = TRUE))

asian.plot =  ggplot(use.dat, aes(x = xs_mean, y = ys_mean)) +
  geom_point(aes(size = count)) + 
  scale_size_continuous(range = c(2, 12)) +
  labs(x = "Absolute Deviation of Guess from Actual Proportion",
       y = "Proportion Correctly Identifying Location",
       title = 'Asian') +
  theme(
    plot.title = element_text(size=30),
    axis.title = element_text(size=24),
    axis.text = element_text(size=20),
    legend.position = "none"
  )


ggsave("FigureA_1Asian.jpeg",
       dpi = 600,
       width = 8.5,
       height = 8.5,
       units = 'in')

use.dat = dat[dat$polyorigin %in% black.corrects,c('black.dev.abs','black.correct.tract')]
xs = use.dat$black.dev.abs
ys = use.dat$black.correct.tract
use.dat = as.data.frame(cbind(xs,ys))
use.dat$bins <- cut(xs, breaks = 10)
use.dat <- ddply(use.dat, .(bins), summarise,
                 count = length(xs),
                 ys_mean = mean(ys, na.rm = TRUE),
                 xs_mean = mean(xs, na.rm = TRUE))

black.plot =  ggplot(use.dat, aes(x = xs_mean, y = ys_mean)) +
  geom_point(aes(size = count)) + 
  scale_size_continuous(range = c(2, 12)) +
  labs(x = "Absolute Deviation of Guess from Actual Proportion",
       y = "Proportion Correctly Identifying Location",
       title = 'Black') +
  theme(
    plot.title = element_text(size=30),
    axis.title = element_text(size=24),
    axis.text = element_text(size=20),
    legend.position = "none"
  )
ggsave("FigureA_1Black.jpeg",
       dpi = 600,
       width = 8.5,
       height = 8.5,
       units = 'in')


use.dat = dat[dat$polyorigin %in% hispanic.corrects,c('hispanic.dev.abs','hispanic.correct.tract')]
xs = use.dat$hispanic.dev.abs
ys = use.dat$hispanic.correct.tract
use.dat = as.data.frame(cbind(xs,ys))
use.dat$bins <- cut(xs, breaks = 10)
use.dat <- ddply(use.dat, .(bins), summarise,
                 count = length(xs),
                 ys_mean = mean(ys, na.rm = TRUE),
                 xs_mean = mean(xs, na.rm = TRUE))

hispanic.plot =  ggplot(use.dat, aes(x = xs_mean, y = ys_mean)) +
  geom_point(aes(size = count)) + 
  scale_size_continuous(range = c(2, 12)) +
  labs(x = "Absolute Deviation of Guess from Actual Proportion",
       y = "Proportion Correctly Identifying Location",
       title = 'Hispanic') +
  theme(
    plot.title = element_text(size=30),
    axis.title = element_text(size=24),
    axis.text = element_text(size=20),
    legend.position = "none"
  )
ggsave("FigureA_1Hispanic.jpeg",
       dpi = 600,
       width = 8.5,
       height = 8.5,
       units = 'in')


use.dat = dat[dat$polyorigin %in% white.corrects,c('white.dev.abs','white.correct.tract')]
xs = use.dat$white.dev.abs
ys = use.dat$white.correct.tract
use.dat = as.data.frame(cbind(xs,ys))
use.dat$bins <- cut(xs, breaks = 10)
use.dat <- ddply(use.dat, .(bins), summarise,
                 count = length(xs),
                 ys_mean = mean(ys, na.rm = TRUE),
                 xs_mean = mean(xs, na.rm = TRUE))

white.plot =  ggplot(use.dat, aes(x = xs_mean, y = ys_mean)) +
  geom_point(aes(size = count)) + 
  scale_size_continuous(range = c(2, 12)) +
  labs(x = "Absolute Deviation of Guess from Actual Proportion",
       y = "Proportion Correctly Identifying Location",
       title = 'White') +
  theme(
    plot.title = element_text(size=30),
    axis.title = element_text(size=24),
    axis.text = element_text(size=20),
    legend.position = "none"
  )
ggsave("FigureA_1White.jpeg",
       dpi = 600,
       width = 8.5,
       height = 8.5,
       units = 'in')



